library(tidyverse)
library(data.table)
library(texreg)
`%nin%`= Negate(`%in%`)

theme <- theme_minimal() +
  theme(text = element_text(size = 12),
        axis.title = element_text(size = 11),
        legend.position="top",
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) 

# Figure 2 ####

fig2 <- fread("data/fig2.csv")

ggdat_fig2 <- fig2 %>% 
  filter(french_speaking == 0) %>% 
  mutate(
    ratio = case_when(
      Canada > `United States` ~ -(Canada/`United States`),
      Canada < `United States` ~ `United States`/Canada,
      Canada == `United States` ~ 0
    )) %>%
  mutate(
    ratio = case_when(
      ratio < 1.1 & ratio > -1.1 ~ as.double(0),
      ratio < -10 ~ as.double(-10),
      ratio > 10 ~ as.double(10),
      TRUE ~ as.double(ratio)
    )) %>%
  mutate(
    color = factor(case_when(
      ratio < 0 ~ "More Canadians",
      ratio > 0 ~ "More Americans",
      TRUE ~ "Same"), levels = c("More Canadians", "Same", "More Americans"))
  )

ggdat_fig2 %>% 
  mutate(color = factor(color,
                        levels = c("More Canadians", "Same", "More Americans"))) %>% 
  mutate(ratio = round(ratio)) %>%
  ggplot(aes(x = ratio, fill = color)) +
  geom_bar(aes(y = (..count..)/sum(..count..)), col = "black") +
  scale_x_continuous(breaks = c(-10,-5,-2,0,2,5,10),
                     labels = c("10x more\nor greater", "5x more", "2x more","Equal",
                                "2x more", "5x more", "10x more\nor greater")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 
  scale_fill_manual(values = c("#FF0000", "white","#3C3B6E")) +
  labs(x = "Ratio of Canadian to American follows on Twitter", y = "Percentage of total Canadian sample") +
  theme +
  theme(axis.text.x=element_text(angle=45, hjust=1),
        axis.ticks.x = element_line("black", 1, "solid"))
        
ggsave("outputs/fig2.png", width = 6.5, height = 4.5, units = "in")
        
ggdat_fig2 %>% 
  pivot_longer(cols = c("United States", "Canada", "Other")) %>% 
  group_by(name) %>% 
  summarize(median = median(value),
            total = sum(value),
            .groups = "drop") %>% 
  mutate(overall = sum(total),
         prop = total/overall)

ggdat_fig2 %>% 
  count(color) %>% 
  mutate(overall = sum(n),
         prop = n/overall)

ggdat_fig2 %>% 
  summarize(n = n(),
            total_10 = sum(ifelse(ratio == 10, 1, 0))) %>% 
  mutate(prop = total_10/n)
  

# Figure 3 ####

fig3 <- read.csv("data/fig3.csv")

ggdat_fig3 <- fig3 %>% 
  mutate(label = factor(label, levels = c("Follows","All retweets","COVID-19\nretweets","Misinformation/\nmisinformation\ndebate retweets")),
    country = factor(country,
                     labels = c("Canada","Other", "United States"),
                     levels = c("Canada", "Other", "United States"))) %>%
  group_by(label) %>%
  rename(n = total) %>%
  mutate(total = sum(n))

ggdat_fig3 %>% 
  ggplot(aes(x = label, y = n, fill = country)) +
  geom_bar(position = "fill", stat="identity", col = "black", width = 0.8) + 
  geom_text(aes(label = ifelse(country == "Other", scales::comma(total), '')), y = 1.1, col = "black", ) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0,1,0.1)) + 
  scale_fill_manual(values = c("#FF0000", "white", "#3C3B6E"), guide = guide_legend(reverse = FALSE)) +
  labs(y = "Percentage per location-\ncategory of original tweets", x = "") +
  theme +
  coord_cartesian(ylim = c(0,1.1))

ggsave("outputs/fig3.png", width = 6.5, height = 4.5, units = "in")

ggdat_fig3 %>%
  pivot_wider(id_cols = label, names_from = country, values_from = n) %>%
  mutate(total = Canada + Other + `United States`, 
         ratio_US = Canada/`United States`,
         ratio_US = `United States`/Canada,
         prop_US = `United States`/total,
         prop_CA = Canada/total,
         group = "United States") %>%
  mutate(group = factor(group, levels = c("United States", "United Kingdom"))) %>%
  filter(group == "United States") 

# Regression ####

fread("data/misinfo_sds.csv") %>% head()
  lm(data = ., misinfo_sds ~ us_sds) %>%
  texreg::screenreg(digits = 3, single.row = TRUE)

# Supplement ####

ggdat_rt_sensitivity <- fread("data/supp_fig1.csv") %>% 
  mutate(label = case_when(
    label == "covid1" ~ "covid or\ncoronavirus",
    label == "covid2" ~ "covid,\ncoronavirus,\nor pandemic",
    label == "covid3" ~ "covid,\ncoronavirus,\npandemic, virus,\nor lockdown",
    label == "misinfo1a" ~ "misinfo_og+\ncovid or\ncoronavirus",
    label == "misinfo1b" ~ "misinfo_og+\ncovid,\ncoronavirus,\nor pandemic",
    label == "misinfo1c" ~ "misinfo_og+\ncovid,\ncoronavirus,\npandemic, virus,\nor lockdown",
    label == "misinfo2a" ~ "misinfo_mod+\ncovid or\ncoronavirus",
    label == "misinfo2b" ~ "misinfo_mod+\ncovid,\ncoronavirus,\nor pandemic",
    label == "misinfo2c" ~ "misinfo_mod+\ncovid,\ncoronavirus,\npandemic, virus,\nor lockdown",
  )) %>%
  pivot_wider(id_cols = "label", values_from = "n", names_from = "country") %>%
  mutate(ratio = `United States`/Canada) %>%
  pivot_longer(cols = c("Canada","Other", "United States"), names_to = "country")

ggdat_rt_sensitivity %>%
  filter(label %nin% c("All follows","All retweets")) %>%
  mutate(row = case_when(
    str_detect(label, "_og") ~ 2,
    str_detect(label, "_mod") ~ 3,
    TRUE ~ 1,
  )) %>%
  ggplot(aes(x = label, y = value, fill = country)) +
  geom_bar(position = "fill", stat="identity", col = "black", width = 0.6) +
  theme +
  geom_text(aes(
    label = ifelse(country != "Other", paste0(round(value/1000,1),"k"), NA),
    y = case_when(country == "Canada" ~ 1.1, country == "United States" ~ -0.1, TRUE ~ 0.7)
  ),col = "black", ) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0,1,0.2)) + 
  scale_fill_manual(values = c("#FF0000", "white", "#3C3B6E"), guide = guide_legend(reverse = FALSE)) +
  labs(x = "", y = "") +
  facet_wrap(row~., scales = "free_x", nrow = 3) +
  theme(strip.background = element_blank(),strip.text.x = element_blank())

ggsave("outputs/supp_fig1.png", width = 6.5, height = 8, units = "in")
  